#library(dplyr)#library(ggplot2)library(RColorBrewer)library(purrr) # For map()# Data preparationpd = data |>group_by(v03_news.source, v35_country) |>summarize(count =n()) |>left_join(dt03, by =join_by(v03_news.source == V2)) |>mutate(cn =paste(v35_country, V3), cc =as.factor(v35_country))
`summarise()` has grouped output by 'v03_news.source'. You can override using
the `.groups` argument.
# Define unique countries and palettescountries <-unique(pd$v35_country)palettes <-c("Blues", "Greens", "Reds")names(palettes) <- countries # Map palettes to countries: dk=Blues, fi=Greens, se=Reds# Generate colors per grouppd <- pd %>%group_by(v35_country) %>%mutate(color_values = { palette_name <- palettes[unique(v35_country)] # Get single palette for this group colors <-colorRampPalette(brewer.pal(9, palette_name))(n_distinct(V3)) # Generate color range colors[as.numeric(factor(V3))] # Map colors to V3 levels } ) %>%ungroup()# Plot with faceting, custom heatmaps, and light gray backgroundp1 =ggplot(pd, aes(x = V3, y = count, fill = cn)) +geom_bar(stat ="identity", position ="dodge") +facet_wrap(~ v35_country, scales ="free_x") +scale_fill_manual(values = pd$color_values, name ="News Source by Country") +labs(y ="Count", x ="News Source (V3)", title ="v03_news.source by v35_country") +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1),panel.background =element_rect(fill ="lightgray", colour =NA) # Light gray background )# Display the plotp1
library(dplyr)library(ggplot2)library(RColorBrewer)# Data preparation: Select and rename columnspd = data |>select(35, 33) |>rename(v35 =1, v33 =2)# Calculate means and standard errors by countrypd_summary = pd |>group_by(v35) |>summarise(mean_v33 =mean(v33, na.rm =TRUE),se_v33 =sd(v33, na.rm =TRUE) /sqrt(n()) # Standard error of the mean ) |>mutate(country_name =recode(v35, "dk"="Denmark", "fi"="Finland", "se"="Sweden") )# Bar plot with error bars using Dark2 palettep1 =ggplot(pd_summary, aes(x = country_name, y = mean_v33, fill = country_name)) +geom_bar(stat ="identity", alpha =1.0) +# Bar plot for meansgeom_errorbar(aes(ymin = mean_v33 - se_v33, ymax = mean_v33 + se_v33), width =0.2, size =0.8) +# Error bars for SEMscale_fill_brewer(palette ="Dark2") +# Dark2 palettelabs(y ="Emancipation of Target Group", x ="Country", title ="Mean Emancipation of Target Group by Country" ) +theme_minimal() +# Clean themetheme(legend.position ="none", # Remove legend (redundant with x-axis)axis.title =element_text(size =12, face ="bold"), # Bold, larger axis titlesaxis.text =element_text(size =10), # Clear axis labelsplot.title =element_text(size =14, face ="bold", hjust =0.5) # Centered, bold title )
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
# Display the plotp1
# Significance Test: ANOVA (since v35 has 3 levels: dk, fi, se)anova_result <-aov(v33 ~ v35, data = pd)summary(anova_result)
Df Sum Sq Mean Sq F value Pr(>F)
v35 2 75.9 37.96 11.68 1.69e-05 ***
Residuals 182 591.5 3.25
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Optional: Pairwise t-tests with Bonferroni correction (if you want specific group comparisons)pairwise_t <-pairwise.t.test(pd$v33, pd$v35, p.adjust.method ="bonferroni")print(pairwise_t)
Pairwise comparisons using t tests with pooled SD
data: pd$v33 and pd$v35
dk fi
fi 0.014 -
se 8.6e-06 0.058
P value adjustment method: bonferroni
Warning: Removed 3 rows containing missing values or values outside the scale range
(`geom_segment()`).
250302: pretty diagrams
frequency of categories
#if (!requireNamespace("colorblindr", quietly = TRUE)) install.packages("colorblindr", lib="~/lib/r-cran")#library(ggplot2)#library(gridExtra)#library(dplyr) # For string manipulation and recode#library(stringr) # For str_trunc#library(colorblindr) # For extended Okabe-Ito palettelibrary(RColorBrewer) # For ColorBrewer palettes# get graphsfor (i inc(4:9,12:15,18:21,22:32,34)) {pd =desc_get_brewer(data, get(paste0("dt",sprintf("%02d", i))), i, var_pl=T)# get tableprint(pd[2])#write.table(pd[2], paste0("../tmp/haidi-table-v", sprintf("%02d", i), ".csv"), sep="\t", quot=T, row.names=F)}
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
v04_article.type, count values by country and article type
Below is a summary interpretation of the chi-square test results:
Test Statistic and Significance:
The chi-square statistic is 72.89 with 6 degrees of freedom. The associated p-value is extremely small (approximately 1.04 × 10⁻¹³), which is far below any conventional significance level (e.g., 0.05). This provides strong evidence to reject the null hypothesis of independence.
Interpretation:
The results indicate that there is a statistically significant association between country (dk, fi, se) and article type (1, 2, 3, 4). In other words, the distribution of article types is not uniform across the three countries.
Observed vs. Expected Frequencies:
The contingency table shows notable differences between observed counts and expected counts under the assumption of independence. For example:
In Denmark (dk), article type 2 was observed only 7 times while about 14.57 were expected, and article type 4 was observed 9 times compared to an expected 2.38.
In Finland (fi), article type 1 appears more frequently (observed 59 vs. expected 40.67), and no articles of type 4 were observed even though about 3.70 were expected.
In Sweden (se), article type 2 is observed 34 times, which is much higher than the expected count of about 17.84.
Conclusion:
The chi-square test clearly demonstrates that the type of article published is associated with the country. This suggests that factors related to the country may influence the distribution of article types.
In summary, the analysis shows that the differences in article type counts across Denmark, Finland, and Sweden are unlikely to be due to chance, pointing to a meaningful relationship between country and article type.
v05_article.size, count values by country and article size
The chi-square test yielded a statistic of approximately 70.19 with 4 degrees of freedom and a p-value of about 2.07×10⁻¹⁴. This extremely small p-value indicates that there is a statistically significant association between country (dk, fi, se) and article size (small, medium, large). In other words, the observed distribution of article sizes across the three countries is very unlikely to have occurred by chance if the two variables were independent.
Expected counts under independence: approximately 16.69 (small), 14.04 (medium), 18.28 (large)
The observed count for small articles is lower than expected, while medium and large are higher than expected.
Expected counts: approximately 25.88 (small), 21.77 (medium), 28.35 (large)
Here, there is an excess of small articles and a deficit of large articles compared to the expected frequencies.
Expected counts: approximately 20.43 (small), 17.19 (medium), 22.38 (large)
In Sweden, the observed count for large articles is much higher than expected, while small and medium counts are lower.
Conclusion:
The significant chi-square test result indicates that the distribution of article sizes is not independent of country. The differences between the observed and expected frequencies suggest that each country has a distinct pattern in article size distribution. For example, Finland appears to favor small articles, whereas Sweden shows a strong tendency toward large articles. These differences are statistically significant and suggest that factors related to each country may be influencing the article size choices.
v06_illustrated
The chi-square test produced a statistic of approximately 68.33 with 4 degrees of freedom and an extremely small p-value (≈ 5.12×10⁻¹⁴). This means we reject the null hypothesis of independence and conclude that there is a statistically significant association between country and illustration status.
Key Observations:
Overall Significance:
The p-value is far below any conventional significance threshold (e.g., 0.05), indicating that the differences in illustration status across countries are highly unlikely to be due to chance.
Country-Specific Patterns:
Denmark (dk):
Observed counts: 14 (no illustration, 0), 23 (one illustration, 1), 12 (two or more illustrations, 2).
Expected counts: approximately 17.58, 26.63, and 4.79 respectively.
Interpretation: Denmark shows a notable excess in the highest illustration category (v06_illustrated = 2) compared to expectations.
Finland (fi):
Observed counts: 49 (0), 25 (1), 2 (2).
Expected counts: approximately 27.26, 41.30, and 7.43 respectively.
Interpretation: Finland has many more articles with no illustrations (v06_illustrated = 0) and far fewer with one or multiple illustrations than expected.
Sweden (se):
Observed counts: 3 (0), 52 (1), 4 (2).
Expected counts: approximately 21.16, 32.07, and 5.77 respectively.
Interpretation: Sweden has far fewer non-illustrated articles than expected and a higher count for the one illustration category.
Conclusion:
There is clear evidence that the distribution of illustration status (v06_illustrated) differs significantly by country. Denmark tends to publish a higher proportion of articles with two or more illustrations than expected, Finland tends to have a high proportion of articles with no illustrations, and Sweden shows an overrepresentation in the one illustration category. These differences suggest that country-specific factors or editorial practices may be influencing the use of illustrations.
v07_type.of.illu
The chi-square test produced a statistic of approximately 42.02 with 8 degrees of freedom and an extremely small p-value (≈ 1.34×10⁻⁶). This provides very strong evidence against the null hypothesis of independence, indicating that the distribution of illustration types (v07_type_of_illustration) is significantly different across the three countries.
Key Points:
Overall Significance:
With a p-value far below conventional significance thresholds (e.g., 0.05), we conclude that the type of illustration used is not independent of the country. In other words, the distribution of illustration types varies significantly by country.
Observed vs. Expected Frequencies:
Denmark (dk):
Type 1: Observed 15 vs. Expected ≈ 20.04
Type 3: Observed 9 vs. Expected ≈ 2.69
Type 4: Observed 6 vs. Expected ≈ 2.39
Type 5: Observed 2 vs. Expected ≈ 0.90
These differences suggest that Denmark has fewer type 1 illustrations and considerably more type 3 and type 4 illustrations than would be expected if there were no association.
Finland (fi):
Type 1: Observed 14 vs. Expected ≈ 14.89
Type 2: Observed 12 vs. Expected ≈ 6.67
No articles were observed for types 3, 4, or 5, whereas some counts were expected (e.g., type 3 expected ≈ 2.00).
This indicates that Finland has an overrepresentation of type 2 illustrations and a lack of articles in the higher illustration categories.
Sweden (se):
Type 1: Observed 38 vs. Expected ≈ 32.07
Type 2: Observed 15 vs. Expected ≈ 14.36
Type 3: Observed 0 vs. Expected ≈ 4.31
Type 4: Observed 2 vs. Expected ≈ 3.83
Type 5: Observed 1 vs. Expected ≈ 1.44
For Sweden, the count for type 1 is slightly higher than expected, while there are fewer type 3 illustrations than expected.
Conclusion:
The significant chi-square statistic indicates that the type of illustration is associated with the country. Each country shows a distinct pattern: - Denmark appears to favor higher counts in types 3 and 4. - Finland shows a strong emphasis on type 2 illustrations with a near absence of higher illustration categories. - Sweden has a relatively high count of type 1 illustrations and lower counts for type 3.
These findings suggest that country-specific factors or editorial policies may influence the choice or prevalence of illustration types in articles.
v08_target.group
The chi-square test produced a statistic of about 20.79 with 2 degrees of freedom and a very small p-value (≈ 3.06×10⁻⁵). This p-value is far below any common significance threshold (e.g., 0.05), meaning we reject the null hypothesis that the country and target group in illustration are independent.
Key Points:
Statistical Significance:
The small p-value indicates that the observed differences in target group distribution across the three countries (dk, fi, se) are unlikely to have occurred by chance.
Observed vs. Expected Frequencies:
For Denmark (dk):
Observed: 27 for group 0 and 2 for group 1
Expected: Approximately 20.3 for group 0 and 8.7 for group 1
This suggests Denmark has more group 0 and fewer group 1 articles than expected under independence.
For Finland (fi):
Observed: 4 for group 0 and 11 for group 1
Expected: About 10.5 for group 0 and 4.5 for group 1
This indicates Finland has an overrepresentation of group 1 and an underrepresentation of group 0.
For Sweden (se):
Observed: 39 for group 0 and 17 for group 1
Expected: Roughly 39.2 for group 0 and 16.8 for group 1
The observed counts for Sweden are very close to what would be expected.
Overall Interpretation:
The significant chi-square test result tells us that the distribution of the target group in illustration (group 0 vs. group 1) differs by country. Denmark and Finland show marked deviations from the expected frequencies under the assumption of independence, while Sweden’s distribution is close to the expected values. This indicates that country-specific factors may be influencing the categorization into target groups.
In summary, there is strong evidence of an association between the country and the target group in illustration.
if (T) {#result_df = data |>left_join(dt03, by=join_by(v03_news.source==V2)) |>mutate(cn=paste(v35_country, V3), cc=as.factor(v35_country), v36_date_year=str_extract(v02_date, "\\d{4}")) |>group_by(cn, v36_date_year) |>summarize(count=n())#cat(simplermarkdown::md_table(result_df))}
`summarise()` has grouped output by 'cn'. You can override using the `.groups`
argument.
cn
v36_date_year
count
dk EB
2017
1
dk FS
2017
1
dk FS
2018
2
dk FS
2019
3
dk FS
2021
3
dk FS
2022
1
dk JV
2017
2
dk JV
2018
1
dk JV
2019
1
dk JV
2021
1
dk JV
2022
2
dk NJ
2018
1
dk NJ
2020
1
dk NJ
2022
1
dk PO
2017
3
dk PO
2018
2
dk PO
2019
9
dk PO
2020
4
dk PO
2021
5
dk PO
2022
5
fi HS
2017
2
fi HS
2018
1
fi HS
2019
7
fi HS
2020
3
fi HS
2021
2
fi IS
2017
1
fi IS
2020
2
fi IS
2022
1
fi K
2017
1
fi K
2019
1
fi K
2020
4
fi K
2021
3
fi K
2022
5
fi LK
2017
3
fi LK
2020
2
fi LK
2021
5
fi LK
2022
5
fi TS
2017
5
fi TS
2018
1
fi TS
2019
6
fi TS
2020
5
fi TS
2021
3
fi TS
2022
8
se AB
2017
2
se AB
2018
2
se AB
2019
1
se AB
2020
2
se AB
2021
1
se AB
2022
1
se DN
2017
1
se DN
2018
3
se DN
2019
3
se DN
2020
1
se DN
2021
4
se DN
2022
1
se GP
2017
4
se GP
2018
3
se GP
2019
2
se GP
2020
1
se GP
2021
1
se GP
2022
1
se SDS
2018
1
se SDS
2019
2
se SDS
2020
3
se SDS
2021
3
se SDS
2022
1
se VK
2017
2
se VK
2018
2
se VK
2019
7
se VK
2020
2
se VK
2021
2
se VK
2022
1
if (F) {#result_df = data |>mutate(v36_date_year =str_extract(v02_date, "\\d{4}")) |>select(v35_country, v36_date_year)#cat(simplermarkdown::md_table(result_df))}
pd = data |>group_by(v04_article.type) |>summarize(mean=mean(v05_article.size)) |>left_join(dt04, by=join_by(v04_article.type==V2))
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
frequency charts
# get graphsfor (i inc(4:9,12:15,18:21,22:32,34)) {pd =desc_get(data, get(paste0("dt",sprintf("%02d", i))), i, var_pl=T)# get tableprint(pd[2])#write.table(pd[2], paste0("../tmp/haidi-table-v", sprintf("%02d", i), ".csv"), sep="\t", quot=T, row.names=F)}
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
library(RColorBrewer)#display.brewer.all()# custom themesome_graph <-theme(panel.grid.major=element_line(linewidth=2))some_color <-c("deeppink", "chartreuse", "midnightblue")# put the elements in a listtheme_haidi <-list(some_graph, scale_color_manual(values=some_color))theme_haidi <-list(some_graph, scale_colour_brewer(palette="Blues"))
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
pd = data |>select(35,33) |>rename(v35=1, v33=2)p1 =ggplot(pd, aes(x=v35, y=v33, fill=v35)) +geom_boxplot(alpha=1.0) +stat_summary(fun=mean) +labs(y="v33_power.sum.index", x="v35_country", title="v33_power.sum.index by v35_country") +theme(legend.position="none")#p1
Warning: Removed 3 rows containing missing values or values outside the scale range
(`geom_segment()`).
for (i in33:34) {pl =desc_get(data, get(paste0("dt",i)), i)}
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
231018: descriptives
for (i in22:32) {pl =desc_get(data, get(paste0("dt",i)), i)}
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
for (i in18:21) {pl =desc_get(data, get(paste0("dt",i)), i)}
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
231017: descriptives
for (i in12:15) {pl =desc_get(data, get(paste0("dt",i)), i)}
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
231016: descriptives
for (i in6:9) {pl =desc_get(data, get(paste0("dt0",i)), i)}
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'v35_country'. You can override using the
`.groups` argument.
# write.table(data, "../csv/haidi-dk-2.tsv", sep="\t", quot=T, row.names=F)# transform data to wide (10 content units * 28 content vars)data = data |>pivot_wider(id_cols=coder_id, names_from=some, values_from=c(3:30)) |>select(-coder_id)# data
# write.table(data, "../csv/haidi-fi.tsv", sep="\t", quot=T, row.names=F)# transform data to wide (10 content units * 28 content vars)data = data |>pivot_wider(id_cols=coder_id, names_from=some, values_from=c(3:30)) |>select(-coder_id)# data
# write.table(data, "../csv/haidi-dk.tsv", sep="\t", quot=T, row.names=F)# transform data to wide (10 content units * 28 content vars)data = data |>pivot_wider(id_cols=coder_id, names_from=some, values_from=c(3:30)) |>select(-coder_id)# data
# write.table(data, "../csv/haidi-se.tsv", sep="\t", quot=T, row.names=F)# transform data to wide (10 content units * 28 content vars)data = data |>pivot_wider(id_cols=coder_id, names_from=some, values_from=c(3:30)) |>select(-coder_id)# data
# A tibble: 10 × 6
coder_id content_id x1 x3 x9 x10
<chr> <int> <chr> <dbl> <dbl> <chr>
1 a 1 S001 102 99 Dolda larmsiffrorna: Så dåligt mår 85-…
2 a 2 S002 103 99 Satsningar som räddar liv
3 a 3 S003 102 99 Detta måste ni rätta till i vården, po…
4 a 4 S004 101 99 Sju utmaningar - därför är det kris i …
5 a 5 S005 104 99 De kommande årens satsningar sker i pr…
6 b 1 S006 102 0 De har full koll på senioren
7 b 2 S007 103 99 Mossig kritik mot vårdappar
8 b 3 S008 101 99 Folksjukdomar som kan förvärras i spår…
9 b 4 S009 101 99 Så vill regeringen möta utmaningarna i…
10 b 5 S010 105 99 Tekniken ska avlasta personalen
# transform datadata = data |>pivot_wider(id_cols=coder_id, names_from=content_id, values_from=x3) |>select(-coder_id)# https://rpubs.com/jacoblong/content-analysis-krippendorff-alpha-Rdata
Krippendorff’s Alpha values range from -1 to 1, with 1 representing unanimous agreement between the raters, 0 indicating they’re guessing randomly, and negative values suggesting the raters are systematically disagreeing. As suggested by Krippendorff, alphas above 0.8 are considered very good agreement, and tentative conclusions can be made with data where α≥0.667
# A tibble: 10 × 5
content_id coder_id var1 var2 var3
<dbl> <chr> <dbl> <chr> <lgl>
1 1 A 1 Red FALSE
2 2 A 3 Blue TRUE
3 3 A 5 Blue TRUE
4 4 A 7 Green TRUE
5 5 A 1 Red FALSE
6 1 B 1 Red FALSE
7 2 B 3 Blue FALSE
8 3 B 3 Green FALSE
9 4 B 7 Green TRUE
10 5 B 3 Red FALSE